In [13]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import plotly.express as px
In [2]:
Orders = pd.read_excel('retail_orders_W23.xlsx')
In [3]:
Store = pd.read_excel('store.xlsx')
In [4]:
Retail_Data = pd.read_csv('retail_data_W23 - retail_data_W23.csv')
In [5]:
sns.heatmap(
    Orders.corr(),
    annot= True
)
plt.show()
In [6]:
sns.heatmap(
    Store.corr(),
    annot= True
)
plt.show()
In [7]:
Store.drop(columns=['Promo2'], axis=1, inplace= True)
In [8]:
sns.heatmap(
    Store.corr(),
    annot= True
)
plt.show()
In [9]:
sns.heatmap(
    Retail_Data.corr(),
    annot= True
)
plt.show()
In [11]:
avg_customers_by_store = Retail_Data.groupby('Store')['Customers'].mean().reset_index()
In [14]:
fig = px.scatter(avg_customers_by_store, x='Store', y='Customers',
                 hover_data=['Store', 'Customers'], title='Average Customers per Store')

fig.show()
In [15]:
Retail_Data['Date'] = pd.to_datetime(Retail_Data['Date'])
fig = px.line(Retail_Data, x='Date', y='Customers', title='Daily Total Customers over Time')
fig.show()
In [21]:
import plotly.figure_factory as ff

fig = ff.create_distplot([Retail_Data['Customers']], ['Customers'], bin_size=100, show_rug=False)
fig.update_layout(title='Distribution of Customers')
fig.show()#This graph shows that the data of customers is right skewed
In [22]:
heatmap_data = Retail_Data.groupby(['DayOfWeek', 'Promo'])['Customers'].mean().reset_index()
fig = px.imshow(heatmap_data.pivot('DayOfWeek', 'Promo', 'Customers'), 
                x=['No Promo', 'Promo'], y=['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday'],
                labels=dict(x="Promo", y="Day of Week", color="Customers"),
                title='Customers by Day of Week and Promo')
fig.show()
In [ ]: